home *** CD-ROM | disk | FTP | other *** search
- /* -*- C -*-
- * CVTPARSE.C
- *
- * (c)Copyright 1993 by Tobias Ferber, All Rights Reserved
- */
-
- #include <stdarg.h>
- #include <string.h>
- #include <stdlib.h>
- #include <stdio.h>
-
- #include "cvt.h"
-
- #ifndef isprint
- #define isprint(c) (' '<=(c) && (c)<='~')
- #endif /* !isprint */
-
-
- /*** / LERROR / ***/
-
- void lerror(long line, const char *fmt, ...)
- /* gibt eine Fehlermeldung 'fmt' f"ur die Zeile #line aus. */
- {
- va_list argp;
- va_start(argp,fmt);
- fprintf(ferr,"line %ld: ",line);
- vfprintf(ferr,(char *)fmt,argp);
- fprintf(ferr,"\n");
- fflush(ferr);
- ++global_numerrors;
- va_end(argp);
- }
-
-
- /*** / READCRULE / ***/
-
- crule_t *readcrule(FILE *fp)
- /*
- Diese Funktion bildet den lexical Scanner. Sie überliest Leerzeichen,
- TABs, Zeilenwechsel und Kommentare, liest die nächste Transformationsregel
- ein und baut daraus eine crule_t Struktur auf. Ein Zeiger auf diese
- crule_t Struktur (vom Typ crule_t *) wird zurückgegeben.
-
- Fehlermeldungen werden mit lerror() ausgegeben, die Zeilennummern werden
- hier gezählt. Man beachte, daß deshalb keine andere Funktion 'fp'
- über ein Zeilenende schieben darf, da sonst die Nummerierung falsch wird.
-
- Diese Funktion überliest alle Zeichen, die Teil eines Kommentares sind.
- Ein Kommentar ist (wie in C) alles zwischen einem '/' + '*' Token
- und dem korrespondierenden '*' + '/'. Es sind auch C++ Kommentare
- (hier 'remark' genannt) erlaubt. Letztere werden durch zwei Slashes
- '/' + '/' eingeleitet und reichen dann bis zum Ende der Zeile in der
- sie auftreten. Genau wie in C (und C++) sind geschachtelte Kommentare
- (nested comments) hier NICHT erlaubt.
- */
-
- {
- static unsigned char *xhsbuf= (unsigned char *)0L;
- static unsigned long bufsize= 0L;
-
- /* Der xhs-Buffer ist statisch & dynamisch und wird beim jungfräulichen
- Aufruf dieser Funktion aufgebaut. Dabei werden zunächst 'XHSBUFSIZE'
- Bytes allokiert und bei jedem Überlauf ebensoviele mittels realloc()
- angehängt. */
-
- static unsigned long line= 1; /* line counter */
-
- crule_t *cr; /* current rule */
- unsigned long n=0; /* #of chars read into xhsbuf[] */
- unsigned char c; /* currently read character */
-
- int ccode= 0; /* char code built up by collecting digits */
- int ccdigits= 0; /* #of digits collected for ccode */
-
- char xhs='l'; /* working on the lhs ('l') or rhs ('r') */
-
- /* local script file scanner modes */
-
- typedef enum {
-
- data_mode, /* Im data_mode werden "white" characters
- * enfernt. Vom data_mode aus kann bis auf
- * in den instruction_mode in jeden anderen
- * scanner mode übergegangen werden. */
-
- instruction_mode, /* In den instruction_mode wird übergegangen,
- * wenn ein data packet beendet ist.
- * Der Scanner erwartet dann entweder ein
- * concatenation token (CATSYM) oder ein
- * derivation token `->'. Auch in diesem
- * scanner mode werden "white" characters
- * entfernt. */
-
- decimal_mode,
- hex_mode,
- octal_mode, /* In diesen Modi werden die Ziffern für ein
- * character token eingesammelt. In den
- * decimal_mode kann nur vom data_mode aus
- * übergegangen werden, während die beiden
- * anderen Modi auch über den string_mode
- * erreicht werden können. */
-
- string_mode, /* Im string_mode werden die Zeichen für
- * ein data packet direkt eingesammelt, also
- * ohne Umweg über die character codes.
- * In den string_mode wird übergegangen, wenn
- * der Scanner im data_mode auf ein double
- * quote `"' stößt. Ein weiteres (nicht mit
- * einem Backslash "escapetes" double quote
- * bringt den scanner in den instruction_mode. */
-
- remark_mode,
- comment_mode, /* Diese Modi können sowohl vom data_mode
- * als auch vom instruction_mode aus erreicht
- * werden, wenn der Scanner auf den entsprechenden
- * comment leader (s.o) trifft.
- * In diesem Fall wird der momentane scanner
- * mode in einen Stack gepushed und am Ende
- * des Kommentars wieder gepoppt. */
-
- return_mode, /* In den return_mode wird übergegangen, wenn
- * die Transformationsregel komplett ist, also
- * wenn ein RTERM im instruction_mode auf der
- * rechten Seite gefunden wurde. */
-
- error_mode, /* Wenn etwas in den Schlüpfer geht, so geht
- * auch der Scanner. Nämlich in den error_mode
- * über ;) Die Zeichen bis zum nächsten RTERM
- * oder EOF werden dann überlesen. */
-
- panic_mode, /* Im Vergleich zum error_mode kehrt der scanner
- * im panic_mode sofort zurück ohne bis zum
- * nächsten RTERM weiterzulesen. */
- } smode_t;
-
- smode_t smode; /* current scanner mode */
- smode_t smstack; /* previous scanner mode; needed to restore
- * 'smode' e.g. when leaving 'remark_mode' */
-
- /* Um den von dieser Funktion allokierten xhsbuf wieder freizugeben
- genügt es, diese mit einem FILE *fp == NULL aufzurufen.
- Unabhängig von der Existenz des xhsbuf ist ein Aufruf dieser Funktion
- mit fp == NULL ungefährlich. */
-
- if(!fp)
- { if(xhsbuf)
- { free(xhsbuf);
- xhsbuf= (unsigned char *)0L;
- }
- bufsize= 0L;
- line= 1;
- return NIL(crule_t);
- }
-
- /* Initially we expect some char data */
- smode= smstack= data_mode;
-
- if(!xhsbuf)
- {
- xhsbuf= (unsigned char *)malloc(XHSBUFSIZE * sizeof(char));
-
- if(!xhsbuf)
- { echo("Not enough memory to read rules.");
- return NIL(crule_t);
- }
- bufsize= XHSBUFSIZE;
- }
-
- if( ! (cr= new()) )
- {
- lerror(line,"Ran out of memory w/ %ld rules read.",global_numrules);
- smode= panic_mode;
- }
-
- /* let's roll... */
- while( smode != return_mode &&
- smode != error_mode &&
- smode != panic_mode && !ferror(fp) /*&& !feof(fp)*/ )
- {
- c= fgetc(fp);
-
- #ifdef DEBUG
-
- #define smode_str(m) ( \
- m == data_mode ? "data" : ( \
- m == instruction_mode ? "instruction" : ( \
- m == decimal_mode ? "decimal" : ( \
- m == hex_mode ? "hex" : ( \
- m == octal_mode ? "octal" : ( \
- m == string_mode ? "string" : ( \
- m == remark_mode ? "remark" : ( \
- m == comment_mode ? "comment" : ( \
- m == return_mode ? "return" : ( \
- m == error_mode ? "error" : ( \
- m == panic_mode ? "panic" : "<unknown>" )))))))))))
-
- if(debuglevel >= 4)
- {
- printf("scanner: read ");
-
- if(isprint(c)) printf("'%c'",c);
- else printf("'\\%03o'",c);
-
- printf(" in %s_mode [%s_mode]\n",smode_str(smode),smode_str(smstack));
- }
- #endif /* DEBUG */
-
- if(feof(fp) && c!=(unsigned char)EOF)
- {
- #ifdef DEBUG
- printf("scanner: line %ld: feof()==TRUE but fgetc()==0x%02x\n",
- line, (int)(c&0xFF));
- #endif /* DEBUG */
- c= (unsigned char)EOF;
- }
-
- #ifdef _DCC /* buggy shareware DICE */
- switch( (int) c & 0xFF )
-
- #else
- switch(c)
-
- #endif
- {
-
- /**/ case ' ':
- case '\t':
- switch(smode)
- {
- case decimal_mode:
- xhsbuf[n++]= (char)ccode;
- smode= instruction_mode;
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- if(ccdigits==0)
- {
- if(smode==hex_mode)
- {
- lerror(line,"$ used with no following hex digits");
- smode= error_mode;
- }
- else /* smode==octal_mode */
- {
- xhsbuf[n++]= (char)ccode; /* == 0 */
- smode= instruction_mode;
- }
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= instruction_mode;
- }
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /**/ case '\n':
- case '\r':
- switch(smode)
- {
- case decimal_mode:
- xhsbuf[n++]= (char)ccode;
- smode= instruction_mode;
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- lerror(line,"unterminated string at EOL; missing quotes");
- smode= error_mode;
- }
- else /* smstack != string_mode */
- {
- if(ccdigits==0)
- {
- if(smode==hex_mode)
- {
- lerror(line,"$ used with no following hex digits");
- smode= error_mode;
- }
- else /* smode==octal_mode */
- {
- xhsbuf[n++]= (char)ccode; /* == 0 */
- smode= smstack; /* == instruction_mode */
- }
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == instruction_mode */
- }
- }
- break;
-
- case string_mode:
- lerror(line,"unterminated string at EOL; missing quotes");
- smode= error_mode;
- break;
-
- case remark_mode:
- smode= smstack;
- break;
- }
- line++; /* in any case */
-
- /*
- * look foreward and forget about the second character of
- * the newline tuple if there is one.
- */
-
- { int d= fgetc(fp);
- if( !(c=='\n' && d=='\r' || c=='\r' && d=='\n') )
- ungetc(d,fp);
- }
- break;
-
- /**/ case CATSYM:
- switch(smode)
- {
- case data_mode:
- lerror(line,"extra `%c' or missing character data",c);
- smode= error_mode;
- break;
-
- case instruction_mode:
- smode= data_mode;
- break;
-
- case decimal_mode:
- xhsbuf[n++]= (char)ccode;
- smode= data_mode;
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- if(ccdigits==0)
- {
- if(smode==hex_mode)
- {
- lerror(line,"$ used with no following hex digits");
- smode= error_mode;
- }
- else /* smode==octal_mode */
- {
- xhsbuf[n++]= (char)ccode; /* == 0 */
- smode= data_mode;
- }
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= data_mode;
- }
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /**/ case '-':
- switch(smode)
- {
- case data_mode:
- lerror(line,"misplaced unary minus `%c'",c);
- smode= error_mode;
- break;
-
- case instruction_mode:
- switch(c= fgetc(fp))
- {
- case '>':
- if(xhs=='l')
- {
- if(n>0)
- {
- char *t;
-
- if( t= (char *)malloc(n*sizeof(char)) )
- {
- memcpy(t, xhsbuf, n);
- cr->lhs= t;
- cr->l= n;
- xhs= 'r';
- smode= data_mode;
- n= 0;
- }
- else /* t == (char *)0 */
- {
- lerror(line,"not enough memory for another %d bytes lhs",n);
- smode= panic_mode;
- }
- }
- else /* n==0 */
- {
- lerror(line,"malformed or empty lhs; misplaced `->'");
- smode= error_mode;
- }
- }
- else /* xhs=='r' */
- {
- lerror(line,"more than one derivation token `->' in this rule");
- smode= error_mode;
- }
- break;
-
- default:
- lerror(line,"misplaced minus sign `-'; subtraction not supported",c);
- smode= error_mode;
- break;
- }
- break;
-
- case decimal_mode:
- xhsbuf[n++]= (char)ccode;
- smode= instruction_mode;
- ungetc(c,fp);
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- if(ccdigits==0)
- {
- if(smode==hex_mode)
- {
- lerror(line,"$ used with no following hex digits");
- smode= error_mode;
- }
- else /* smode==octal_mode */
- {
- xhsbuf[n++]= (char)ccode; /* == 0 */
- smode= instruction_mode;
- ungetc(c,fp);
- }
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= instruction_mode;
- ungetc(c,fp);
- }
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /**/ case RTERM:
- switch(smode)
- {
- case data_mode:
- if(xhs=='r')
- {
- if(n==0) /* allow an empty RHS */
- {
- smode= return_mode;
- }
- else
- {
- lerror(line,"extra `%c' or `->' before `%c'",CATSYM,c);
- smode= error_mode;
- }
- }
- else /* xhs=='l' */
- {
- if(n==0)
- {
- lerror(line,"empty rule or extra `%c'",c);
- }
- else
- {
- lerror(line,"extra `%c' before `%c'",CATSYM,c);
- lerror(line,"missing derivation token `->'");
- }
- smode= error_mode;
- }
- break;
-
- case instruction_mode:
- if(xhs=='r')
- {
- smode= return_mode;
- }
- else
- {
- lerror(line,"missing derivation token `->'");
- smode= error_mode;
- }
- break;
-
- case decimal_mode:
- if(xhs=='r')
- {
- xhsbuf[n++]= (char)ccode;
- smode= return_mode;
- }
- else /* xhs=='l' */
- {
- lerror(line,"missing derivation token `->'");
- smode= error_mode;
- }
- break;
-
- case hex_mode:
- case octal_mode:
- if(xhs=='r')
- {
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- if(ccdigits==0)
- {
- if(smode==hex_mode)
- {
- lerror(line,"$ used with no following hex digits");
- smode= error_mode;
- }
- else /* smode==octal_mode */
- {
- xhsbuf[n++]= (char)ccode; /* == 0 */
- smode= return_mode;
- }
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= return_mode;
- }
- }
- }
- else /* xhs=='l' */
- {
- lerror(line,"missing derivation token `->'");
- smode= error_mode;
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /* Ein EOF character kann auch auftreten wenn das Ende des
- Streams noch gar nicht erreicht ist. Es werden also diese
- beiden Fälle separat betrachtet: */
-
- /**/ case (unsigned char)EOF:
- switch(smode)
- {
- case data_mode:
- if( feof(fp) )
- {
- if(xhs=='l')
- {
- if(n > 0)
- {
- lerror(line,"malformed rule at EOF; missing `->'");
- smode= error_mode;
- }
- /* else wird am Ende der Schleife sowieso behandelt */
- }
- else /* xhs=='r' */
- {
- if(n==0)
- {
- lerror(line,"missing `%c' at EOF; rhs is empty",RTERM);
- }
- else
- {
- lerror(line,"malformed rhs at EOF; expected `%c' (not `%c')",RTERM,CATSYM);
- }
- smode= error_mode;
- }
- }
- else /* !feof(fp) */
- {
- lerror(line,"misplaced EOF character `\\x%02x'",c);
- smode= error_mode;
- }
- break;
-
- case instruction_mode:
- if( feof(fp) )
- {
- if(xhs=='l')
- {
- lerror(line,"malformed rule at EOF; incomplete lhs");
- }
- else
- {
- lerror(line,"unexpected EOF; missing `%c'",RTERM);
- }
- smode= error_mode;
- }
- else /* !feof(fp) */
- {
- lerror(line,"misplaced EOF character `\\x%02x'",c);
- smode= error_mode;
- }
- break;
-
- case decimal_mode:
- case hex_mode:
- case octal_mode:
- if( feof(fp) )
- {
- lerror(line,"unexpected end of input; missing `%c'",RTERM);
- }
- else
- {
- lerror(line,"misplaced EOF character `\\x%02x'",c);
- }
- smode= error_mode;
- break;
-
- case string_mode:
- if( feof(fp) )
- {
- lerror(line,"unterminated string at EOF");
- smode= error_mode;
- }
- else /* !feof(fp) */
- {
- xhsbuf[n++]= c;
- }
- break;
-
- case comment_mode:
- if( feof(fp) ) /* else ist dann Wurschd */
- {
- lerror(line,"unterminated comment at EOF; closing `*/' missing");
- smode= error_mode;
- }
- break;
- }
- break;
-
- /**/ case '$':
- switch(smode)
- {
- case data_mode:
- smstack= instruction_mode;
- smode= hex_mode;
- ccode= 0;
- ccdigits= 0;
- break;
-
- case instruction_mode:
- case decimal_mode:
- lerror(line,"misplaced hex token `%c' or missing `%c'",c,CATSYM);
- smode= error_mode;
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- lerror(line,"misplaced hex token `%c' or missing `%c'",c,CATSYM);
- smode= error_mode;
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /**/ case '\"':
- switch(smode)
- {
- case data_mode:
- smode= string_mode;
- break;
-
- case instruction_mode:
- case decimal_mode:
- lerror(line,"unmatched quotes or missing `%c'",CATSYM);
- smode= error_mode;
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- lerror(line,"unmatched quotes or missing `%c'",CATSYM);
- smode= error_mode;
- }
- break;
-
- case string_mode:
- smode= instruction_mode;
- break;
- }
- break;
-
- /**/ case '\\':
- switch(smode)
- {
- case data_mode:
- case instruction_mode:
- case decimal_mode:
- lerror(line,"misplaced escape character `%c' or missing quotes",c);
- smode= error_mode;
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- lerror(line,"misplaced escape character `%c' or missing quotes",c);
- smode= error_mode;
- }
- break;
-
- case string_mode:
-
- /* character constants taken from 'The C++ Programming Language'
- * Second Edition, Bjarne Stroustup, AT&T Bell Laboratories */
-
- switch(c= fgetc(fp))
- {
- case 'n': xhsbuf[n++]= '\n'; break; /* newline */
- case 't': xhsbuf[n++]= '\t'; break; /* horizontal tab */
- case 'v': xhsbuf[n++]= '\v'; break; /* vertical tab */
- case 'b': xhsbuf[n++]= '\b'; break; /* backspace */
- case 'r': xhsbuf[n++]= '\r'; break; /* carriage return */
- case 'f': xhsbuf[n++]= '\f'; break; /* form feed */
- case 'a': xhsbuf[n++]= '\a'; break; /* alert */
- case '\\': xhsbuf[n++]= '\\'; break; /* backslash */
- case '?': xhsbuf[n++]= '\?'; break; /* question mark */
- case '\'': xhsbuf[n++]= '\''; break; /* single quote */
- case '\"': xhsbuf[n++]= '\"'; break; /* double quote */
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- smstack= smode; /* == string_mode */
- smode= octal_mode;
- ccode= (int)c-'0';
- ccdigits= 1;
- break;
-
- case 'x':
- case 'X': /* how about the uppercase 'X' ? */
- smstack= smode; /* == string_mode */
- smode= hex_mode;
- ccode= 0;
- ccdigits= 0;
- break;
-
- default:
- lerror(line,
- ( isprint(c)
- ? "unknown escape sequence `\\%c'"
- : "unknown escape sequence: `\\' followed by char code 0x%x"
- ),c
- );
- smode= error_mode;
- break;
- }
- break;
- }
- break;
-
- /**/ case '/':
- switch(smode)
- {
- case data_mode:
- case instruction_mode:
- switch(c= fgetc(fp))
- {
- case '*':
- smstack= smode;
- smode= comment_mode;
- break;
-
- case '/':
- smstack= smode;
- smode= remark_mode;
- break;
-
- default:
- if(smode==data_mode)
- {
- lerror(line,"missing or unexpected slash `/'");
- }
- else /* smode==instruction_mode */
- {
- lerror(line,"misplaced single slash `/'; division not supported");
- }
- smode= error_mode;
- ungetc(c,fp);
- break;
- }
- break;
-
- case decimal_mode:
- xhsbuf[n++]= (char)ccode;
- smode= instruction_mode;
- ungetc(c,fp);
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- { if(ccdigits==0)
- {
- if(smode==hex_mode)
- {
- lerror(line,"$ used with no following hex digits");
- smode= error_mode;
- }
- else /* smode==octal_mode */
- {
- xhsbuf[n++]= (char)ccode; /* == 0 */
- smode= instruction_mode;
- ungetc(c,fp);
- }
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= instruction_mode;
- ungetc(c,fp);
- }
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /**/ case '*':
- switch(smode)
- {
- case comment_mode:
- switch(c= fgetc(fp))
- {
- case '/':
- smode= smstack; /* data_mode or instruction_mode */
- break;
- #ifdef OBSOLETE
- default:
- ungetc(c,fp);
- break;
- #endif
- }
- break;
-
- case decimal_mode:
- lerror(line,"misplaced asterisk `%c'; multiplication not supported",c);
- smode= error_mode;
- break;
-
- case hex_mode:
- case octal_mode:
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can not happen in octal_mode in string_mode */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- lerror(line,"misplaced asterisk `%c'; multiplication not supported",c);
- smode= error_mode;
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
-
- case data_mode:
- case instruction_mode:
- lerror(line,"misplaced or unexpected asterisk `%c'",c);
- break;
- }
- break;
-
- /**/ case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- switch(smode)
- {
- case data_mode:
- smstack= instruction_mode;
- if(c=='0')
- {
- smode= octal_mode;
- ccdigits= 0; /* allow another 'global_maxoctdigits' */
- }
- else /* c > '0' */
- {
- smode= decimal_mode;
- }
- ccode= (int)c-'0';
- break;
-
- case instruction_mode:
- lerror(line,"misplaced digit `%c' -- extra space or missing `%c'",c,CATSYM);
- smode= error_mode;
- break;
-
- case decimal_mode:
- ccode= ccode * 10 + c-'0';
- if(ccode > global_numchars)
- {
- lerror(line,"decimal value out of range for character (> %d)",
- global_numchars-1);
- smode= error_mode;
- }
- break;
-
- case hex_mode:
- {
- int cc= ccode * 0x10 + c-'0';
-
- if(cc >= global_numchars)
- {
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can only happen if global_numchars < 0x0F */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- lerror(line,"hex value out of range for character (> $%x)",
- global_numchars-1);
- smode= error_mode;
- }
- }
- else /* cc < global_numchars */
- {
- ++ccdigits;
- if(smstack == string_mode &&
- ccdigits >= global_maxhexdigits)
- {
- xhsbuf[n++]= (char)cc;
- smode= smstack; /* == string_mode */
- }
- else ccode= cc;
- }
- }
- break;
-
- case octal_mode:
- if(c=='8' || c=='9')
- {
- if(smstack==string_mode)
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- else /* smstack != string_mode */
- {
- lerror(line,"not an octal digit: `%c'",c);
- smode= error_mode;
- }
- }
- else /* '0'<=c && c<='7' */
- {
- int cc= ccode * 010 + c-'0';
-
- if(cc >= global_numchars)
- {
- if(smstack==string_mode)
- {
- /* we can be sure that ccdigits is >= 1 */
- xhsbuf[n++]= ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- else /* smstack != string_mode */
- {
- lerror(line,"octal value out of range for character (> 0%o)",
- global_numchars-1);
- smode= error_mode;
- }
- }
- else /* cc < gobal_numchars */
- {
- ++ccdigits;
- if(smstack == string_mode &&
- ccdigits >= global_maxoctdigits)
- {
- xhsbuf[n++]= (char)cc;
- smode= smstack; /* == string_mode */
- }
- else ccode= cc;
- }
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /**/ case 'A': case 'a':
- case 'B': case 'b':
- case 'C': case 'c':
- case 'D': case 'd':
- case 'E': case 'e':
- case 'F': case 'f':
- switch(smode)
- {
- case data_mode:
- lerror(line,"misplaced character `%c' -- missing quotes or `$'",c);
- smode= error_mode;
- break;
-
- case instruction_mode:
- lerror(line,"misplaced character `%c' -- extra space or missing quotes",c);
- smode= error_mode;
- break;
-
- case decimal_mode:
- lerror(line,"not a decimal digit: `%c'",c);
- smode= error_mode;
- break;
-
- case hex_mode:
- {
- int cc= ccode * 0x10 + toupper(c)-'A' + 0x0A;
-
- if(cc >= global_numchars)
- {
- if(smstack==string_mode)
- {
- if(ccdigits==0)
- { /* can only happen if global_numchars < 0x0F */
- lerror(line,"\\x used with no following hex digits");
- smode= error_mode;
- }
- else /* ccdigits > 0 */
- {
- xhsbuf[n++]= (char)ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- }
- else /* smstack != string_mode */
- {
- lerror(line,"hex value out of range for character (> $%x)",
- global_numchars-1);
- smode= error_mode;
- }
- }
- else /* cc < global_numchars */
- {
- ++ccdigits;
- if(smstack == string_mode &&
- ccdigits >= global_maxhexdigits)
- {
- xhsbuf[n++]= (char)cc;
- smode= smstack; /* == string_mode */
- }
- else ccode= cc;
- }
- }
- break;
-
- case octal_mode:
- if(smstack==string_mode)
- {
- xhsbuf[n++]= ccode;
- smode= smstack; /* == string_mode */
- ungetc(c,fp);
- }
- else
- {
- lerror(line,"not an octal digit: `%c'",c);
- smode= error_mode;
- }
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
-
- /**/ default:
- switch(smode)
- {
- case data_mode:
- case instruction_mode:
- case decimal_mode:
- case hex_mode:
- case octal_mode:
- if( isprint(c) )
- {
- lerror(line,"parse error: misplaced character `%c'",c);
- }
- else /* !isprint(c) */
- {
- lerror(line,"parse error: unknown character code 0x%x",c);
- }
- smode= error_mode;
- break;
-
- case string_mode:
- xhsbuf[n++]= c;
- break;
- }
- break;
- }
-
- if(n >= bufsize)
- {
- bufsize += XHSBUFSIZE;
- xhsbuf= (unsigned char *)realloc(xhsbuf, bufsize);
- if(!xhsbuf)
- {
- lerror(line,"not enough memory to enlarge scanner buffer to %ld bytes",
- bufsize);
- smode= panic_mode;
- }
- #ifdef DEBUG
- else if(debuglevel >= 1)
- {
- printf("scanner buffer enlarged to %ld bytes in line %ld\n",
- bufsize, line);
- }
- #endif /* DEBUG */
- }
-
- /* avoid generation of further, spurious error messages */
-
- if( smode == error_mode && c != RTERM )
- {
- while( !feof(fp) && !ferror(fp) && (c= fgetc(fp)) != RTERM )
- {
- if(c=='\n' || c=='\r')
- {
- int d= fgetc(fp);
- if( !(c=='\n' && d=='\r' || c=='\r' && d=='\n') )
- ungetc(d,fp);
-
- ++line;
- }
- }
- }
- }
-
- if(smode == return_mode)
- {
- cr->ln= line;
-
- if(n>0)
- { char *t;
- if( t= (char *)malloc(n*sizeof(char)) )
- { memcpy(t, xhsbuf, n);
- cr->rhs= t;
- cr->r= n;
- ++global_numrules;
- }
- else
- {
- lerror(line,"not enough memory for another %d bytes rhs",n);
- smode= panic_mode;
- }
- }
- else /* just to be sure... */
- {
- cr->rhs= (char *)0L;
- cr->r= 0L;
- ++global_numrules;
- }
- }
-
- return (smode == return_mode) ? cr : dispose(cr);
- }
-